home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
PsL Monthly 1993 December
/
PSL Monthly Shareware CD-ROM (December 1993).iso
/
prgmming
/
dos
/
basic
/
doublhsh.bas
< prev
next >
Wrap
BASIC Source File
|
1990-03-24
|
8KB
|
189 lines
'
' Abstract
' --------
'
' Double hashing is a useful method for data compression when
' it is necessary to search for strings previously encountered
' in the uncompressed data. Double hashing performs better than
' linear probing when the hash table gets full, and is usually
' faster than linked lists.
'
'
' Description
' -----------
'
' This double hashing method is in Power BASIC v 1.1 which
' supports modulus arithmetic on long and quad integers.
' The code was translated from the pseudo Pascal source code in
'
' Robert Sedgewick. ALGORITHMS. Addison-Wesley.
' 1983 ed. pp 203/10.
'
' and was modified to hash 32 K items into a table size of
' 32 K entries, occupying about 64 K RAM for the table.
'
' Of incidental interest to the method are the following lists
' of prime numbers near to but not exceeding the base-2 boundaries
' of 8 K, 16 K, 32 K, and 64 K. (See the textbook for why.)
'
' 8101, 8111, 8117, 8123, 8147, 8161, 8171, 8179, 8191
'
' 16301, 16319, 16333, 16339, 16349, 16361, 16363, 16369, 16381
'
' 32693, 32707, 32713, 32717, 32719, 32749
'
' 65497, 65519, 65521
'
'
' For more details and other data compression material, contact:
'
' Colin James III, CQA
' Certified Quality Analyst
' 1975 Oak St, #4
' Lakewood, CO 80215-2737
'
' DATA: (303) 234 - 0085 CEC Services BBS, 9600 V.32c MNP 9, 8-N-1
'
' Specializing in: data compression, Ada, BASIC,
' SQA/SQC, and DoD-STD-2167A
'
' New callers may download the public catalog list
' of files (occupying about 130 MB of over 2 GB).
'
' Subscription for general access is $ 10 per month.
'
' Specialty access to compression source code is $150
' per month where a significant, two-pass improvement on
' LZW has been developed as LZJ, is now available, and
' when a generalized mathematical description is completed
' is due to be patented as an unique apparatus.
' (LZJ compresses about 15 % better than the products now
' available; speed is relatively faster in the PC model.)
'
' Please note:
' CEC Services can NOT be reached on either of the
' major computer services, only at the number above.
'
' VOICE: (303) 234 - 0084 ( 4 PM to 8 PM Mountain Time O N L Y )
'
' ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
'
' Double-Hash v 02
' -----------
'
' Copr 1990, Colin James III All Rights Reserved
' Permission to copy is hereby granted
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
' set up data type definitions
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
DEFINT I ' define I for integers
DEFLNG L ' define L for long integers
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
' set up Lempel-Ziv [ LZ ] table data structure
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
DIM I.LZ.prefix ( 0: 32766) ' LZ codes are broken into prefix
' codes or sequence numbers of
' 15-bits ( 0 ... 32766)
DIM I.LZ.suffix ( 0: 32766) ' and suffix codes or input bytes
' following the prefix
'
' Two integer arrays are used due to the single array size limitation of
' 64 K in Power BASIC (the successor to Turbo BASIC). Indexing is the
' same in the respective arrays. The sequential codes entered into the
' arrays could of course be output as variable length codes as the method
' Lempel-Ziv-Welch or LZW. (The variable code would be the prefix only,
' followed by an 8-bit byte suffix.) These arrays are necessary to have
' below for checking a hash hit. As Sedgewick notes, a hash table which
' is about 90% full can take 50 compares for a linear probe but only 10
' compares with double hashing.
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
' set up hash table
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LET I.prime.max = 32719 ' large prime for hash table size = M
LET I.prime.nxt = I.prime.max - 2 ' secondary hash increment = M - 2
LET I.hasht.max = I.prime.max ' max size of hash table
LET I.hasht.min = 0 ' min size of hash table
DIM I.hash.table( I.hasht.min: I.hasht.max)
' long int hash table, 0 ... 32719
LET L.max.val = ( 2 ^ 22) - 1 ' largest numeric value to hash
' is 8,388,607
LET I.sen.val = 32767 ' sentinel value > i.hasht.max
FOR I = 0 TO I.hasht.max ' loop to initialize hash table to
LET I.hash.table( I) = I.sen.val ' sentinel value of 32767
NEXT i
LET I.prefix = 0 ' initialize prefix code to a
' sequence number of zero
LET I.seq.no = -1 ' initialize sequence number so
' that subsequent increment of
' seq no + 1 (or 0) is the first
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
' get next input byte to compress
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LET I.prefix = 8191 ' sample prefix value is an arbitrary
' sequence number in the LZW table
' and refers to a compressed string
LET I.suffix = 255 ' next input byte to compress
LET L.search.val = I.prefix * 256 + I.suffix
' this builds a numeric value to hash
' ( = 2,097,151)
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
' hashsearch
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
LET I.hash01 = L.search.val MOD I.prime.max
' first hash value calculated
LET I.hash02 = I.prime.nxt - ( L.search.val MOD I.prime.nxt)
' second hash value calculated
LET I.hash03 = I.has01 ' third hash value initialized
WHILE ( I.hash.table( I.hash03) <> I.sen.val) _
AND ( I.LZ.prefix( I.hash.table( I.hash03)) <> I.prefix ) _
AND ( I.LZ.suffix( I.hash.table( I.hash03)) <> I.suffix )
' this loop searches for the next
' available empty hash slot
LET I.hash03 = ( I.hash03 + I.hash02) MOD I.prime.max
WEND
LET I.hash.result = I.hash.table( I.hash03)
IF I.hash.result <> I.sen.val THEN
LET I.prefix = I.hash.result ' result is the sequence no
ELSE
LET I.seq.no = I.seq.no + 1 ' increment sequence no of the
' next code to be inserted
LET I.hash.table( I.hash03) = I.seq.no
' insert sequence no of code
' into hash table
LET I.LZ.prefix( I.seq.no) = I.prefix
' build compression output table
LET I.LZ.suffix( I.seq.no) = I.suffix
' build compression output table
LET I.prefix = 0 ' clear prefix code
END IF
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
'jump where next byte is input for compression & prefix + suffix catenated
' - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
END